/*********************************************************************
	Project: RADV CPI FFSA
    Program: Tables Unadjusted.sas
	Purpose: The following tables are created for the unadjusted data, original run of the regression model:

	Tables:	1	The dollar coefficients for each disease coefficient and demographic
			2	Risk factors, which are the dollar coefficients divided by the average expenditures (average expenditures are calculated in the PROC MEANS below the PROC REG)
			3	The predicted expenditures for each beneficiary  both sample benes and MA enrollees (should be about 3.4 million records)
			4	The actual expenditures (apy05commabad) for each beneficiary  just the sample benes
			5	Count of sample beneficiaries with the disease coefficient/demographic for each disease coefficient/demographic (i.e. number of enrollees with the disease coefficient/demographic set to 1)
			6	The average risk for each disease coefficient and demographic. Defined as the product of the estimated coefficient for disease 
				coefficient/demographic j and the number of sample beneficiaries with disease coefficient/demographic j 
				all divided by the number of sample beneficiaries. 
			7	The relative factor for each disease coefficient and demographic (RFj). Defined as the average risk for HCC j divided by the 
				average actual expenditures (average calculated over sample beneficiaries)
			8	The sum of the relative factors for each disease coefficient/demographic. Defined as the product of the relative factor for 
				disease coefficient/demographic j and the number of sample beneficiaries with disease coefficient/demographic j
			9	The risk scores for each sample beneficiary (based on relative factors). Defined as the sum of the products of the relative factor 
				for each disease coefficient/demographic j and the indicator of the disease coefficient/demographic j over all disease coefficients/demographics
            10	The risk scores for each sample beneficiary (based on risk factors). Defined as the sum of the products of the risk factors and the HCC indicators

*********************************************************************/

libname out "Z:\...\Table Output";

libname out1 "Z:\...\Table Output\Table_1";
libname out2 "Z:\...\Table Output\Table_2";
libname out3 "Z:\...\Table Output\Table_3";
libname out4 "Z:\...\Table Output\Table_4";
libname out5 "Z:\...\Table Output\Table_5";
libname out6 "Z:\...\Table Output\Table_6";
libname out7 "Z:\...\Table Output\Table_7";
libname out8 "Z:\...\Table Output\Table_8";
libname out9 "Z:\...\Table Output\Table_9";
libname out10 "Z:\...\Table Output\Table_10";

libname dat1 "Z:\...\FFSA Calibration";

options mprint;

%macro createtables_orig(type,rep);

/***********************************************/
/*Table 1 Original*/
	
	proc transpose data=estcoeffs1 out=out1.table_1_&type._&rep. (rename=(col1=coefficient _name_=HCC_interaction _label_=label));
	run;


/*Table 2 Original*/
	proc means data=predictions1 noprint;
	   var apy05commabad predicted;
	   output out=mean_predictions1(drop=_type_ _freq_) mean=mean_expense mean_predicted;
	   where apy05commabad~=.; /* Removes MA enrollees from the calculations. */
	   title2 "Original FFS Data, Average Expenses, and Predicted Expenses";
	run;

	proc sql;
	create table out2.table_2_&type._&rep. as
	select hcc_interaction,
			label,
			coefficient/mean_expense as risk_factor
	from out1.table_1_&type._&rep., mean_predictions1;
	quit;


/*Table 3 Original*/
	data out3.table_3_&type._&rep.; 
	   set predictions1 (keep=hicno ma_enrollee_number predicted);
	run;


/*Table 4 Original*/
	data out4.table_4_&type._&rep.;
	   set pophcc (keep=hicno apy05commabad);
	run;


/*Table 5 Original*/
	proc means data=pophcc sum noprint;
		var f0_34 -- f95_gt m0_34 -- m95_gt hcc1--hcc177 d_hcc5 d_hcc44 d_hcc51 d_hcc52 d_hcc107 dm_chf1 dm_cvd_70hccs
                         chf_copd copd_cvd_cad_70hccs rf_chf1 rf_chf_dm;
		output out=table_5_&type._&rep. sum= ;
	run;

	proc transpose data=table_5_&type._&rep. out=out5.table_5_&type._&rep. (rename=(col1=count _name_=HCC_interaction _label_=label));
	var f0_34 -- f95_gt m0_34 -- m95_gt hcc: d_hcc5 d_hcc44 d_hcc51 d_hcc52 d_hcc107 dm_chf1 dm_cvd_70hccs
                         chf_copd copd_cvd_cad_70hccs rf_chf1 rf_chf_dm;
	run;


/*Table 6 Original*/
	proc sql;
	create table out6.table_6_&type._&rep. as
	select a.hcc_interaction, a.label,
			coefficient*count/1441247 as avg_risk  /* 1441247 is number of sample benes */
	from out1.table_1_&type._&rep. as a full join out5.table_5_&type._&rep. as b
	on a.hcc_interaction=b.hcc_interaction;
	quit;

/*Table 7 Original*/
	proc sql;
	create table out7.table_7_&type._&rep. as
	select hcc_interaction,
			label,
			avg_risk/mean_expense as relative_factor
	from out6.table_6_&type._&rep., mean_predictions1;
	quit;


/*Table 8 Original*/
	proc sql;
	create table out8.table_8_&type._&rep. as
	select a.hcc_interaction, a.label,
			relative_factor*count as sum_rel_factor
	from out7.table_7_&type._&rep. as a full join out5.table_5_&type._&rep. as b
	on a.hcc_interaction=b.hcc_interaction;
	quit;


/*Table 9 Original */

   proc transpose data=out7.table_7_&type._&rep. out=table_7_prime(drop=_name_);
   run;

   data out9.table_9_&type._&rep.;
      set pophcc;
      if _n_=1 then set table_7_prime;
      array rf {105} col1--col105;
      array hcc {105} f0_34 -- f95_gt m0_34 -- m95_gt hcc1--hcc177 d_hcc5 d_hcc44 d_hcc51 d_hcc52 d_hcc107 dm_chf1 dm_cvd_70hccs
                     chf_copd copd_cvd_cad_70hccs rf_chf1 rf_chf_dm;
      risk_score=0;
      do i=1 to 105;
         risk_score=risk_score+rf [i]*hcc [i]; /* disease risk score for each bene based on 
	                                              relative factors */
      end;
      keep hicno risk_score;
   run;

/* Table 10 Original */

   proc transpose data=out2.table_2_&type._&rep. out=table_2_prime(drop=_name_);
   run;

   data out10.table_10_&type._&rep.;
      set pophcc;
      if _n_=1 then set table_2_prime;
      array nc {105} col1--col105;
      array hcc {105} f0_34 -- f95_gt m0_34 -- m95_gt hcc1--hcc177 d_hcc5 d_hcc44 d_hcc51 d_hcc52 d_hcc107 dm_chf1 dm_cvd_70hccs
                         chf_copd copd_cvd_cad_70hccs rf_chf1 rf_chf_dm;
      risk_score=0;
      do i=1 to 105;
         risk_score=risk_score+nc [i]*hcc [i]; /* disease risk score for each bene based on 
	                                              risk factors. */
      end;
      keep hicno risk_score;
   run;

%mend createtables_orig;


/* Input Datasets */
	data pophcc;
	   set dat1.y5r1s15f;
	run;
	data masterma(rename=(originally_disabled_female_aged=originallydisabled_female 
	                      originally_disabled_male_aged=originallydisabled_male
	                      dm_cvd=dm_cvd_70hccs copd_cvd_cad=copd_cvd_cad_70hccs));
	   set dat1.samptb_y13_full1m dat1.samptb_y13_elig1m;
	   length f0_34 -- f95_gt m0_34 -- m95_gt 4 hcc1 -- hcc177 3;
	   apy05commabad=.;
	   newexpenditures=.;
	   ma_enrollee_number=_n_;
	   keep apy05commabad newexpenditures f0_34 -- f95_gt m0_34 -- m95_gt hcc1 -- hcc177
	        originally_disabled_female_aged 
	        medicaid_female_aged medicaid_female_disabled  originally_disabled_male_aged 
	        medicaid_male_Aged medicaid_male_disabled d_hcc5 d_hcc44 d_hcc51
	        d_hcc52 d_hcc107 dm_chf1 dm_cvd chf_copd copd_cvd_cad rf_chf1 rf_chf_dm
	        ma_enrollee_number;
	run;
	data pophcc_ma;
	   set pophcc masterma;
	run;

    proc reg data=pophcc_ma outest=estcoeffs1(drop=_type_ _model_ _rmse_ _depvar_ apy05commabad);
	   model apy05commabad = f0_34 -- f95_gt m0_34 -- m95_gt hcc1 -- hcc177 
	                         d_hcc5 d_hcc44 d_hcc51 d_hcc52 d_hcc107 dm_chf1 dm_cvd_70hccs
	                         chf_copd copd_cvd_cad_70hccs rf_chf1 rf_chf_dm/noint; 
					                              /* HCCs, demos, interactions */
	   output out=predictions1 (keep=hicno ma_enrollee_number apy05commabad predicted) p=predicted;
	   title2 "Regression on Original FFS Data";
	run;

%createtables_orig(orig,0); /* Output tables 1-10 */

quit;
